global username = c(username)
global dropbox "/Users/${username}/Dropbox"
global master_file_path "${dropbox}/Engel_GFT/replication_files"

global input "$master_file_path/data/intermediate_data/R43R55"
global output "$master_file_path/data/intermediate_data/Conventional_Price_Indices"


set more off

**************************************************************
****      Part 1: Consumption and unit prices within each round
*Note: Prices are district specific (not districtXdecile specific)
*      Consumption shares vary across districtXdeciles
**************************************************************

if 1==1 {
	
  *(1) round 43
   set more off


    use "${input}/expenditures_item_level_R43R55_i_groupsV1.dta" if round==43, clear

    *drop households with flagged discrepancies between sum of exp. and mpce
    drop if flag_mpce==1

    *drop goods with no quantity data
    drop if quantity==.
    drop if quantity==0

    *Keep only goods in our 34 i groupings:
    drop if flag_deaton_drops==1

	replace concorded_itemlabel2=trim(concorded_itemlabel2)

	*Get rid of products beyond fuel and light:
	drop if round==43 & itemcode43>=480
	drop if round==55 & Srl_no_of_item>=360


    *calculate expenditures and quantities at the hhid X item level:
    collapse (sum) expenditure quantity, by(state43 district43 sector wt hhsize mpce hhid concorded_itemlabel2 i_groupsV1 g_groupsV3)

    gen price=expenditure/quantity
    replace price=round(price, 0.01)

    *Angus' automatic test for unit price outliers:
    gen log_price=log(price)
    egen sd_log_price=sd(log_price), by(concorded_itemlabel2)
    egen mean_log_price=mean(log_price), by(concorded_itemlabel2)
    gen diff=abs(log_price-mean_log_price)
    gen compare=0
    replace compare=1 if diff>2*sd_log_price
    drop if compare==1
    drop compare diff mean_log_price sd_log_price log_price

    bysort concorded_itemlabel2 state43 sector: egen price_state=median(price)
    replace price_state=round(price_state, 0.01)

    bysort concorded_itemlabel2 state43 district43 sector: egen price_district=median(price)
    replace price_district=round(price_district, 0.01)
	
    egen item_seq = group(concorded_itemlabel2), label
    egen i_groupsV1_seq=group(i_groupsV1), label

    preserve
    duplicates drop item_seq concorded_itemlabel2 i_groupsV1 g_groupsV3, force
    tempfile item_seq
    save `item_seq'
    restore

    keep expenditure quantity price_state price_district state43 sector district43 hhid wt hhsize mpce item_seq

    egen sum_exp=total(expenditure), by(state43 sector district43 hhid)

    reshape wide expenditure quantity price_state price_district , i(state43 district43 sector wt hhsize mpce hhid sum_exp ) j(item_seq)

    *Fill in with 0 in case of no purchases:
         foreach var of varlist expenditure* quantity* {
	     replace `var'=0 if `var'==.
		 }

	 	
    *create household budget shares:
	  foreach var of varlist expenditure* {
         loc a=substr("`var'",12,.)
		 gen share_`a'=(`var')/sum_exp
		 }

		 egen cover_hh=rowtotal(share_*)
         egen deaton_exp=rowtotal(expenditure*)
         gen deaton_exp_pc=deaton_exp/hhsize


    *Create district level budget shares:
        *Total expenditure by state and sector:
         bysort state43 sector district43: egen tot_exp_dist=total(sum_exp*wt)

         *Expenditure on each good by state and sector:
         foreach var of varlist expenditure* {
         loc a=substr("`var'",12,.)
         egen exp_`a'=total(`var'*wt) ,  by(state43 sector district43)
         gen sharedist_`a'=exp_`a'/tot_exp_dist

		 }
         drop exp_*

    *create 9 deciles of income for each districtXsector and for each stateXsector (for democratic CPI) (centered at 6-15, 16-25, etc.):
    egen decile_st_bounds= xtile(sum_exp), by(state43 sector) nq(20)
    egen decile_st= xtile(sum_exp) if decile_st_bounds!=1 & decile_st_bounds!=20, by(state43 sector) nq(9)
    drop decile_st_bounds

    egen decile_dist_bounds= xtile(sum_exp), by(state43 sector district43) nq(20)
    egen decile_dist= xtile(sum_exp) if decile_dist_bounds!=1 & decile_dist_bounds!=20, by(state43 sector district43) nq(9)
    drop decile_dist_bounds

    *Calculate democratic shares at stateXdecile and districtXdecile level:
	foreach var of varlist share_* {
	loc a=substr("`var'",7,.)
    *Democratic weights district X decile level:
	bysort state sector district43 decile_dist: egen ave_share_decile_dist_`a'=wtmean(`var'), weight(wt)

*Democratic weights district level:
bysort state sector district43 : egen ave_share_dist_`a'=wtmean(`var'), weight(wt)
	}

    *fill district level prices:
  foreach var of varlist price_district* {
     loc a=substr("`var'",15,.)
	 bysort state43 sector district43: egen price_dist_`a'=max(`var')
	 cap drop `var'
	 }


    *fill state level prices:
  foreach var of varlist price_state* {
     loc a=substr("`var'",12,.)
	*can use the max since they are all the same:
    bysort state43 sector : egen price_st_`a'=max(`var')
	 cap drop `var'
	 }

    egen dist_wt=total(wt), by(state43 sector district43)

    *Calculate weighted average share of outlays on deaton goods by decile district:
    gen coverage_hhd=deaton_exp_pc/mpce
	bysort state sector district43 decile_dist: egen coverage_decile_dist=wtmean(coverage_hhd), weight(wt)
    replace coverage_decile_dist=1 if coverage_decile_dist>=1

*Calculate weighted average share of outlays on deaton goods by district:
bysort state sector district43: egen coverage_dist=wtmean(coverage_hhd), weight(wt)
replace coverage_dist=1 if coverage_dist>=1


tempfile inputs
save `inputs'

*districtXdecile dataset:
   duplicates drop state43 sector district43 decile_dist, force
   drop if decile_dist==.
   keep state43 sector district43 decile_dist dist_wt price_st_* price_dist_* ave_share_decile_dist_*  coverage_decile_dist


*Fill in state level prices with national prices if needed.
    *national prices prices:
    foreach var of varlist price_st_* {
     loc a=substr("`var'",10,.)
    bysort sector : egen price_nat_`a'=mean(`var')
    replace `var'=price_nat_`a' if `var'==.
	 }

*Fill in district prices with state level prices above:
    foreach var of varlist price_dist_* {
     loc a=substr("`var'",12,.)
    gen price_fill_dist_`a'=price_dist_`a'
    replace price_fill_dist_`a'=price_st_`a' if  price_fill_dist_`a'==.
	 }

    drop price_nat_* price_st_*
    save "$output/R43_district_X_decile_level_V4",  replace




use `inputs',clear
*district dataset:
duplicates drop state43 sector district43, force
keep state43 sector district43 dist_wt price_st_* price_dist_* ave_share_dist_*  coverage_dist

*Fill in state level prices with national prices if needed.
*national prices prices:
foreach var of varlist price_st_* {
loc a=substr("`var'",10,.)
bysort sector : egen price_nat_`a'=mean(`var')
replace `var'=price_nat_`a' if `var'==.
}

*Fill in district prices with state level prices above:
foreach var of varlist price_dist_* {
loc a=substr("`var'",12,.)
gen price_fill_dist_`a'=price_dist_`a'
replace price_fill_dist_`a'=price_st_`a' if  price_fill_dist_`a'==.
}

drop price_nat_* price_st_*
save "$output/R43_district_level_V4",  replace



}










if 1==1 {
	
  *(1) round 55
   set more off


    use "${input}/expenditures_item_level_R43R55_i_groupsV1.dta" if round==55, clear
    drop hhsize
    rename Household_size hhsize

    *drop households with flagged discrepancies between sum of exp. and mpce
    drop if flag_mpce==1

    *drop goods with no quantity data
    drop if quantity==.
    drop if quantity==0

    *Keep only goods in our 34 i groupings:
    drop if flag_deaton_drops==1

	replace concorded_itemlabel2=trim(concorded_itemlabel2)

	*Make list of goods consistent over rounds:
	*note: other cereals only drops in R55:
    drop if concorded_itemlabel2=="Other cereals"


	*Get rid of products beyond fuel and light:
	drop if round==43 & itemcode43>=480
	drop if round==55 & Srl_no_of_item>=360

	replace quantity=quantity/100
	replace quantity=round(quantity, 0.01)


    *calculate expenditures and quantities at the hhid X item level:
    collapse (sum) expenditure quantity, by(state43 district43 sector wt hhsize mpce hhid concorded_itemlabel2 i_groupsV1 g_groupsV3)

    gen price=expenditure/quantity
    replace price=round(price, 0.01)

    *Angus' automatic test for unit price outliers:
    gen log_price=log(price)
    egen sd_log_price=sd(log_price), by(concorded_itemlabel2)
    egen mean_log_price=mean(log_price), by(concorded_itemlabel2)
    gen diff=abs(log_price-mean_log_price)
    gen compare=0
    replace compare=1 if diff>2*sd_log_price
    drop if compare==1
    drop compare diff mean_log_price sd_log_price log_price

    bysort concorded_itemlabel2 state43 sector: egen price_state=median(price)
    replace price_state=round(price_state, 0.01)

    bysort concorded_itemlabel2 state43 district43 sector: egen price_district=median(price)
    replace price_district=round(price_district, 0.01)
	
    egen item_seq = group(concorded_itemlabel2), label
    egen i_groupsV1_seq=group(i_groupsV1), label

    preserve
    duplicates drop item_seq concorded_itemlabel2 i_groupsV1 g_groupsV3, force
    tempfile item_seq
    save `item_seq'
    restore

    keep expenditure quantity price_state price_district state43 sector district43 hhid wt hhsize mpce item_seq

    egen sum_exp=total(expenditure), by(state43 sector district43 hhid)

    reshape wide expenditure quantity price_state price_district , i(state43 district43 sector wt hhsize mpce hhid sum_exp) j(item_seq)

    *Fill in with 0 in case of no purchases:
         foreach var of varlist expenditure* quantity* {
	     replace `var'=0 if `var'==.
		 }

		 	
    *create household budget shares:
	  foreach var of varlist expenditure* {
         loc a=substr("`var'",12,.)
		 gen share_`a'=(`var')/sum_exp
		 }

		 egen cover_hh=rowtotal(share_*)

         egen deaton_exp=rowtotal(expenditure*)
         gen deaton_exp_pc=deaton_exp/hhsize


    *Create district level budget shares:
        *Total expenditure by state and sector:
         bysort state43 sector district43: egen tot_exp_dist=total(sum_exp*wt)

         *Expenditure on each good by state and sector:
         foreach var of varlist expenditure* {
         loc a=substr("`var'",12,.)
         egen exp_`a'=total(`var'*wt) ,  by(state43 sector district43)
         gen sharedist_`a'=exp_`a'/tot_exp_dist

		 }
         drop exp_*

    *create 9 deciles of income for each districtXsector and for each stateXsector (for democratic CPI) (centered at 6-15, 16-25, etc.):
    egen decile_st_bounds= xtile(sum_exp), by(state43 sector) nq(20)
    egen decile_st= xtile(sum_exp) if decile_st_bounds!=1 & decile_st_bounds!=20, by(state43 sector) nq(9)
    drop decile_st_bounds

    egen decile_dist_bounds= xtile(sum_exp), by(state43 sector district43) nq(20)
    egen decile_dist= xtile(sum_exp) if decile_dist_bounds!=1 & decile_dist_bounds!=20, by(state43 sector district43) nq(9)
    drop decile_dist_bounds

    *Calculate democratic shares at stateXdecile and districtXdecile level:
	foreach var of varlist share_* {
	loc a=substr("`var'",7,.)
    *Democratic weights district X decile level:
	bysort state sector district43 decile_dist: egen ave_share_decile_dist_`a'=wtmean(`var'), weight(wt)

	*Democratic weights district level:
	bysort state sector district43 : egen ave_share_dist_`a'=wtmean(`var'), weight(wt)
	

}


    *fill district level prices:
  foreach var of varlist price_district* {
     loc a=substr("`var'",15,.)
	 bysort state43 sector district43: egen price_dist_`a'=max(`var')
	 cap drop `var'
	 }


    *fill state level prices:
  foreach var of varlist price_state* {
     loc a=substr("`var'",12,.)
	*can use the max since they are all the same:
    bysort state43 sector : egen price_st_`a'=max(`var')
	 cap drop `var'
	 }


    egen dist_wt=total(wt), by(state43 sector district43)


   *Calculate weighted average share of outlays on deaton goods by decile district:
    gen coverage_hhd=deaton_exp_pc/mpce
	bysort state sector district43 decile_dist: egen coverage_decile_dist=wtmean(coverage_hhd), weight(wt)
    replace coverage_decile_dist=1 if coverage_decile_dist>=1

*Calculate weighted average share of outlays on deaton goods by district:
bysort state sector district43: egen coverage_dist=wtmean(coverage_hhd), weight(wt)
replace coverage_dist=1 if coverage_dist>=1


tempfile inputs
save `inputs'

*districtXdecile dataset:
   duplicates drop state43 sector district43 decile_dist, force
   drop if decile_dist==.
   keep state43 sector district43 decile_dist dist_wt  coverage_decile_dist price_st_* price_dist_* ave_share_decile_dist_*


*Fill in state level prices with national prices if needed.
    *national prices prices:
    foreach var of varlist price_st_* {
     loc a=substr("`var'",10,.)
    bysort sector : egen price_nat_`a'=mean(`var')
    replace `var'=price_nat_`a' if `var'==.
	 }

*Fill in district prices with state level prices above:
    foreach var of varlist price_dist_* {
     loc a=substr("`var'",12,.)
    gen price_fill_dist_`a'=price_dist_`a'
    replace price_fill_dist_`a'=price_st_`a' if  price_fill_dist_`a'==.
	 }

drop price_nat_* price_st_*

   save "$output/R55_district_X_decile_level_V4",  replace




use `inputs',clear
*district dataset:
duplicates drop state43 sector district43, force
keep state43 sector district43 dist_wt price_st_* price_dist_* ave_share_dist_*  coverage_dist

*Fill in state level prices with national prices if needed.
*national prices prices:
foreach var of varlist price_st_* {
loc a=substr("`var'",10,.)
bysort sector : egen price_nat_`a'=mean(`var')
replace `var'=price_nat_`a' if `var'==.
}

*Fill in district prices with state level prices above:
foreach var of varlist price_dist_* {
loc a=substr("`var'",12,.)
gen price_fill_dist_`a'=price_dist_`a'
replace price_fill_dist_`a'=price_st_`a' if  price_fill_dist_`a'==.
}

drop price_nat_* price_st_*
save "$output/R55_district_level_V4",  replace



}









**************************************************************
****        Part 2: Construct decileXdistrict CPI: across rounds
****
*State level (Paasche and Laspeyres, plutocratic and democratic)
*District level (Paasche and Laspeyres, plutocratic and democratic)
*StateXdecile (Paasche and Laspeyres, democratic)
*DistrictXdecile (Paasche and Laspeyres, democratic)
**************************************************************


**************************************************************
*DistrictXDecile level (Paasche and Laspeyres, democratic), R43, R55
**************************************************************
if 1==1{

*local round1 "43"
*local round2 "55"
*forvalues i=1/1 {
*local initial: word `i' of `round1'
*local final: word `i' of `round2'

local initial 43
local final 55


   set more off
	 use  "$output/R`final'_district_X_decile_level_V4", clear

	 foreach var of varlist ave_share_decile_dist_1- price_fill_dist_132 {
	    rename `var' _`final'_`var'
		}


	
	merge 1:1 state43 sector district43 decile_dist using "$output/R`initial'_district_X_decile_level_v4"
	 foreach var of varlist ave_share_decile_dist_1- price_fill_dist_132 {
	     rename `var' _`initial'_`var'
		 }
    keep if _merge==3
	drop _merge


    *relative prices:
	 foreach var of varlist _`initial'_price_dist_* {
	   loc a=substr("`var'",16,.)
	    gen price_`final'_`initial'_i`a'= _`final'_price_dist_`a'/`var'
	   }

    *relative prices (filled in list):
	 foreach var of varlist _`initial'_price_fill_dist_* {
	   loc a=substr("`var'",21,.)
	    gen price_fill_`final'_`initial'_i`a'= _`final'_price_fill_dist_`a'/`var'
	   }



*Since CPIs are tobe calculated across rural sector only:
keep if sector=="Rural"
tempfile aux_prices
save `aux_prices'

*Bring in the market_id variable from rural hh shares dataset:
use state43 district43 market_id using "$master_file_path\data\intermediate_data\hh_shares\hh_shares_43_55V1_DM_G108_15_rural_SEC.dta"
duplicates drop market_id, force
tempfile market_id
save `market_id'

use `aux_prices', clear
*bring in market_ids:
merge m:1 state43 district43 using `market_id', nogen keep(3)

*Create boostrap samples:
tempfile cpi_inputs
save `cpi_inputs'

*CREATE BOOTSTRAP SAMPLES:


forvalues b=1(1)1000 {
use `cpi_inputs',clear
bsample, strata(market_id)

sort state43 sector district43  decile_dist
gen ordering=_n

tempfile bs_sample
save `bs_sample'


*District X decile level Laspeyres, democratic:
*	preserve

	foreach var of varlist price_`final'_`initial'_i* {
	   loc a=substr("`var'",14,.)
	   gen cpi_i`a'_`initial'_`final'=`var'*_`initial'_ave_share_decile_dist_`a'
	   }
    egen Lcpi_`initial'_`final'_decile=rowtotal(cpi_i*_`initial'_`final'), mis

    *Now for filled in prices
	foreach var of varlist price_fill_`final'_`initial'_i* {
	   loc a=substr("`var'",19,.)
	   gen cpi_fill_i`a'_`initial'_`final'=`var'*_`initial'_ave_share_decile_dist_`a'
	   }
    egen Lcpi_fill_`initial'_`final'_decile=rowtotal(cpi_fill_i*_`initial'_`final'), mis

    keep ordering market_id state43 sector district43  decile_dist _*_dist_wt   _*_coverage_decile_dist Lcpi_`initial'_`final'_decile Lcpi_fill_`initial'_`final'_decile

    tempfile Lcpi_`initial'_`final'_decile
    save `Lcpi_`initial'_`final'_decile'

* restore

use `bs_sample'

*District X decile level Paasche, democratic:

	foreach var of varlist price_`final'_`initial'_i* {
	   loc a=substr("`var'",14,.)
	   gen cpi_i`a'_`initial'_`final'=(1/`var')*_`final'_ave_share_decile_dist_`a'
	   }
	   egen Pcpi_`initial'_`final'_decile_x=rowtotal(cpi_i*_`initial'_`final'), mis
        gen Pcpi_`initial'_`final'_decile=1/Pcpi_`initial'_`final'_decile_x
        drop Pcpi_`initial'_`final'_decile_x
        drop cpi_i*

    *Now using filled in prices:
	foreach var of varlist price_fill_`final'_`initial'_i* {
	   loc a=substr("`var'",19,.)
	   gen cpi_fill_i`a'_`initial'_`final'=(1/`var')*_`final'_ave_share_decile_dist_`a'
	   }
	   egen Pcpi_fill_`initial'_`final'_decile_x=rowtotal(cpi_fill_i*_`initial'_`final'), mis
        gen Pcpi_fill_`initial'_`final'_decile=1/Pcpi_fill_`initial'_`final'_decile_x
        drop Pcpi_fill_`initial'_`final'_decile_x


    keep ordering market_id state43 sector district43 decile_dist  _*_dist_wt   _*_coverage_decile_dist Pcpi_`initial'_`final'_decile Pcpi_fill_`initial'_`final'_decile

    tempfile Pcpi_`initial'_`final'_decile
    save `Pcpi_`initial'_`final'_decile'

*Assemble districtXdecile CPIs:

    use `Lcpi_43_55_decile',clear
*merge 1:1 state43 district43 decile_dist using `Pcpi_43_55_decile'
    merge 1:1 ordering using `Pcpi_43_55_decile'
    drop _merge

    drop ordering


	save "$output/bootstrap/CPI_district_X_decile_level_V4_bs`b'", replace
}


*end if 1==1:
}








**************************************************************
*District level (Paasche and Laspeyres, democratic), R43, R55
**************************************************************
if 1==1{

local initial 43
local final 55


   set more off
	 use  "$output/R`final'_district_level_V4", clear

	 foreach var of varlist ave_share_dist_1- price_fill_dist_132 {
	    rename `var' _`final'_`var'
		}

	merge 1:1 state43 sector district43 using "$output/R`initial'_district_level_v4"
	 foreach var of varlist ave_share_dist_1- price_fill_dist_132 {
	     rename `var' _`initial'_`var'
		 }
    keep if _merge==3
	drop _merge

    *relative prices:
	 foreach var of varlist _`initial'_price_dist_* {
	   loc a=substr("`var'",16,.)
	    gen price_`final'_`initial'_i`a'= _`final'_price_dist_`a'/`var'
	   }

    *relative prices (filled in list):
	 foreach var of varlist _`initial'_price_fill_dist_* {
	   loc a=substr("`var'",21,.)
	    gen price_fill_`final'_`initial'_i`a'= _`final'_price_fill_dist_`a'/`var'
	   }

*Since CPIs are tobe calculated across rural sector only:
keep if sector=="Rural"
tempfile aux_prices
save `aux_prices'

*Bring in the market_id variable from rural hh shares dataset:
use state43 district43 market_id using "$master_file_path\data\intermediate_data\hh_shares\hh_shares_43_55V1_DM_G108_15_rural_SEC.dta"
duplicates drop market_id, force
tempfile market_id
save `market_id'

use `aux_prices', clear
*bring in market_ids:
merge m:1 state43 district43 using `market_id', nogen keep(3)

*Create boostrap samples:
tempfile cpi_inputs
save `cpi_inputs'

*CREATE BOOTSTRAP SAMPLES:


forvalues b=1(1)1000 {
use `cpi_inputs',clear
bsample, strata(market_id)

sort state43 sector district43
gen ordering=_n

tempfile bs_sample
save `bs_sample'


*District level Laspeyres, democratic:


	foreach var of varlist price_`final'_`initial'_i* {
	   loc a=substr("`var'",14,.)
	   gen cpi_i`a'_`initial'_`final'=`var'*_`initial'_ave_share_dist_`a'
	   }
    egen Lcpi_`initial'_`final'=rowtotal(cpi_i*_`initial'_`final'), mis

    *Now for filled in prices
	foreach var of varlist price_fill_`final'_`initial'_i* {
	   loc a=substr("`var'",19,.)
	   gen cpi_fill_i`a'_`initial'_`final'=`var'*_`initial'_ave_share_dist_`a'
	   }
    egen Lcpi_fill_`initial'_`final'=rowtotal(cpi_fill_i*_`initial'_`final'), mis

    keep ordering market_id state43 sector district43  _*_dist_wt   _*_coverage_dist Lcpi_`initial'_`final' Lcpi_fill_`initial'_`final'

    tempfile Lcpi_`initial'_`final'
    save `Lcpi_`initial'_`final''



use `bs_sample'

*District level Paasche, democratic:

	foreach var of varlist price_`final'_`initial'_i* {
	   loc a=substr("`var'",14,.)
	   gen cpi_i`a'_`initial'_`final'=(1/`var')*_`final'_ave_share_dist_`a'
	   }
	   egen Pcpi_`initial'_`final'_x=rowtotal(cpi_i*_`initial'_`final'), mis
        gen Pcpi_`initial'_`final'=1/Pcpi_`initial'_`final'_x
        drop Pcpi_`initial'_`final'_x
        drop cpi_i*

    *Now using filled in prices:
	foreach var of varlist price_fill_`final'_`initial'_i* {
	   loc a=substr("`var'",19,.)
	   gen cpi_fill_i`a'_`initial'_`final'=(1/`var')*_`final'_ave_share_dist_`a'
	   }
	   egen Pcpi_fill_`initial'_`final'_x=rowtotal(cpi_fill_i*_`initial'_`final'), mis
        gen Pcpi_fill_`initial'_`final'=1/Pcpi_fill_`initial'_`final'_x
        drop Pcpi_fill_`initial'_`final'_x


    keep ordering market_id state43 sector district43  _*_dist_wt   _*_coverage_dist Pcpi_`initial'_`final' Pcpi_fill_`initial'_`final'

    tempfile Pcpi_`initial'_`final'
    save `Pcpi_`initial'_`final''

*Assemble districtXdecile CPIs:

    use `Lcpi_43_55',clear
*merge 1:1 state43 district43 decile_dist using `Pcpi_43_55_decile'
    merge 1:1 ordering using `Pcpi_43_55'
    drop _merge

    drop ordering


	save "$output/bootstrap/CPI_district_level_V4_bs`b'", replace
}


*end if 1==1:
}



